In [1]:
    
# enable magic
from IPython.parallel import Client
c = Client()
    
In [2]:
    
%%px --local
import numpy as np
import pandas as pd
from kobra.dr import Labels
from kobra.tr_utils import prep_out_path, time_now_str
import os
from os import path
import shutil
import mahotas as mh
import mahotas.labeled as mhl
import cv2
from kobra.dr import ImageReader
import time
labels_file =  '/kaggle/retina/trainLabels.csv'
root = '/kaggle/retina/reduced/test'
masks_dir = '/kaggle/retina/test/prepmasks'
features_path = '/kaggle/retina/reduced/features/test'
prefix = 'features'
def get_predicted_region(im, marker):
    res = im.copy()
    res[res != marker] = 0
    return res
names = pd.read_csv(labels_file)
n_bins = 100
isTest = True
    
In [3]:
    
prep_out_path(features_path)
files = os.listdir(root)
    
In [4]:
    
def get_areal_features(f):
    if not isTest:
        label = names.loc[names['image'] == path.splitext(f)[0]]
    else:
        label = path.splitext(f)[0]
    start = time.time()
    imr = ImageReader(root, f, masks_dir, gray_scale = True)
    drusen = get_predicted_region(imr.image, Labels.Drusen)
    blood = get_predicted_region(imr.image, Labels.Haemorage)
    Bc = np.ones((5, 5))
    labels_drusen, n_drusen = mh.label(drusen, Bc)
    labels_blood, n_blood = mh.label(blood, Bc)
    area = float(cv2.countNonZero(imr.mask))
    outp = np.array([], dtype = np.int)
    # sizes excluding background
    sizes_drusen = mhl.labeled_size(labels_drusen)[1:] / area
    sizes_blood = mhl.labeled_size(labels_blood)[1:] / area
    hist_druzen, _ = np.histogram(sizes_drusen, n_bins, (0, 1e-3))
    hist_blood, _ = np.histogram(sizes_blood, n_bins, (0, 1e-3))
    outp = np.r_[outp, hist_druzen]
    outp = np.r_[outp, hist_blood]
    if not isTest:
        outp = np.r_[outp, label.values[0]]
    else:
        outp = np.r_[outp, [label, -1]]        
    return outp
    
In [5]:
    
f = '1_left.png'
get_areal_features(f)
    
    Out[5]:
In [6]:
    
dv = Client().load_balanced_view()
features = dv.map(get_areal_features, np.array(files))
features.wait()
df = pd.DataFrame(data = features[:], columns = range(n_bins * 2) + ['name', 'level'])
df.to_csv(path.join(features_path, prefix + ".csv"), index = False, header=True)